1 Effect of UPSTM-Based Decorrelation on Feature Discovery

1.0.1 Loading the libraries

library("FRESA.CAD")
library(readxl)
library(igraph)
library(umap)
library(tsne)
library(entropy)

op <- par(no.readonly = TRUE)
pander::panderOptions('digits', 3)
pander::panderOptions('table.split.table', 400)
pander::panderOptions('keep.trailing.zeros',TRUE)

1.1 Material and Methods

Data Source https://quantdev.ssri.psu.edu/tutorials/intro-basic-exploratory-factor-analysis

“For this example, we use data from the web that are collected and distributed at https://openpsychometrics.org/_rawdata/. The data were obtained from 19,719 participants (rows) who provided answers to the Big Five Personality Test, constructed with items from the International Personality Item Pool. Data columns include gender, age, race, native language, country, and answers to the 50 likert rated statements (1-5;0 if missed; 1 was labeled as “strongly disagree”, 2 was labeled as “disagree”, 3 was labeled as “neither agree not disagree”, 4 was labeled as “agree” and 5 was labeled as “strongly agree”.) The original files can be obtaned at http://openpsychometrics.org/_rawdata/BIG5.zip

1.2 The Data

BigData <- as.data.frame(read_excel("~/GitHub/LatentBiomarkers/Data/BigData.xlsx"))

BigData[BigData==0] <- NA 
BigData <- BigData[complete.cases(BigData),]
BigData <- BigData[BigData$age<100,]

BigData <- BigData[,-c(1,3,5,6,7)]

BigData$gender <- 1*(BigData$gender==1)

1.2.0.1 Standarize the names for the reporting

studyName <- "Personality"
dataframe <- BigData
outcome <- "gender"

TopVariables <- 10
thro <- 0.20
cexheat = 0.25

1.3 Generaring the report

1.3.1 Libraries

Some libraries

library(psych)
library(whitening)
library("vioplot")
library("rpart")

1.3.2 Data specs

pander::pander(c(rows=nrow(dataframe),col=ncol(dataframe)-1))
rows col
19303 51
pander::pander(table(dataframe[,outcome]))
0 1
11831 7472

varlist <- colnames(dataframe)
varlist <- varlist[varlist != outcome]

largeSet <- length(varlist) > 1500 

1.3.3 Scaling the data

Scaling and removing near zero variance columns and highly co-linear(r>0.99999) columns


  ### Some global cleaning
  sdiszero <- apply(dataframe,2,sd) > 1.0e-16
  dataframe <- dataframe[,sdiszero]

  varlist <- colnames(dataframe)[colnames(dataframe) != outcome]
  tokeep <- c(as.character(correlated_Remove(dataframe,varlist,thr=0.99999)),outcome)
  dataframe <- dataframe[,tokeep]

  varlist <- colnames(dataframe)
  varlist <- varlist[varlist != outcome]
  
  iscontinous <- sapply(apply(dataframe,2,unique),length) >= 5 ## Only variables with enough samples



dataframeScaled <- FRESAScale(dataframe,method="OrderLogit")$scaledData

1.4 The heatmap of the data

numsub <- nrow(dataframe)
if (numsub > 1000) numsub <- 1000


if (!largeSet)
{

  hm <- heatMaps(data=dataframeScaled[1:numsub,],
                 Outcome=outcome,
                 Scale=TRUE,
                 hCluster = "row",
                 xlab="Feature",
                 ylab="Sample",
                 srtCol=45,
                 srtRow=45,
                 cexCol=cexheat,
                 cexRow=cexheat
                 )
  par(op)
}

1.4.0.1 Correlation Matrix of the Data

The heat map of the data


if (!largeSet)
{

  par(cex=0.6,cex.main=0.85,cex.axis=0.7)
  #cormat <- Rfast::cora(as.matrix(dataframe[,varlist]),large=TRUE)
  cormat <- cor(dataframe[,varlist],method="pearson")
  cormat[is.na(cormat)] <- 0
  gplots::heatmap.2(abs(cormat),
                    trace = "none",
  #                  scale = "row",
                    mar = c(5,5),
                    col=rev(heat.colors(5)),
                    main = "Original Correlation",
                    cexRow = cexheat,
                    cexCol = cexheat,
                     srtCol=45,
                     srtRow=45,
                    key.title=NA,
                    key.xlab="|Pearson Correlation|",
                    xlab="Feature", ylab="Feature")
  diag(cormat) <- 0
  print(max(abs(cormat)))
}

[1] 0.7683127

1.5 The decorrelation


DEdataframe <- IDeA(dataframe,verbose=TRUE,thr=thro)
#> 
#>  Included: 51 , Uni p: 0.007904174 , Uncorrelated Base: 2 , Outcome-Driven Size: 0 , Base Size: 2 
#> 
#> 
 1 <R=0.768,r=0.484,N=   33>, Top: 11( 4 )[ 1 : 11 Fa= 11 : 0.535 ]( 11 , 17 , 0 ),<|>Tot Used: 28 , Added: 17 , Zero Std: 0 , Max Cor: 0.571
#> 
 2 <R=0.571,r=0.385,N=   33>, Top: 7( 4 )[ 1 : 7 Fa= 14 : 0.385 ]( 7 , 12 , 11 ),<|>Tot Used: 36 , Added: 12 , Zero Std: 0 , Max Cor: 0.468
#> 
 3 <R=0.468,r=0.334,N=   33>, Top: 8( 5 )[ 1 : 8 Fa= 15 : 0.334 ]( 7 , 12 , 14 ),<|>Tot Used: 43 , Added: 12 , Zero Std: 0 , Max Cor: 0.416
#> 
 4 <R=0.416,r=0.308,N=   33>, Top: 9( 1 )[ 1 : 9 Fa= 16 : 0.308 ]( 8 , 13 , 15 ),<|>Tot Used: 48 , Added: 13 , Zero Std: 0 , Max Cor: 0.400
#> 
 5 <R=0.400,r=0.300,N=   33>, Top: 5( 2 )[ 1 : 5 Fa= 19 : 0.300 ]( 4 , 5 , 16 ),<|>Tot Used: 48 , Added: 5 , Zero Std: 0 , Max Cor: 0.341
#> 
 6 <R=0.341,r=0.270,N=   33>, Top: 7( 1 )[ 1 : 7 Fa= 20 : 0.270 ]( 6 , 6 , 19 ),<|>Tot Used: 48 , Added: 6 , Zero Std: 0 , Max Cor: 0.264
#> 
 7 <R=0.264,r=0.232,N=   33>, Top: 9( 3 )[ 1 : 9 Fa= 21 : 0.232 ]( 7 , 10 , 20 ),<|>Tot Used: 49 , Added: 10 , Zero Std: 0 , Max Cor: 0.266
#> 
 8 <R=0.266,r=0.233,N=   33>, Top: 1( 1 )[ 1 : 1 Fa= 21 : 0.233 ]( 1 , 1 , 21 ),<|>Tot Used: 49 , Added: 1 , Zero Std: 0 , Max Cor: 0.231
#> 
 9 <R=0.231,r=0.215,N=   16>, Top: 6( 1 )[ 1 : 6 Fa= 22 : 0.215 ]( 6 , 9 , 21 ),<|>Tot Used: 49 , Added: 9 , Zero Std: 0 , Max Cor: 0.227
#> 
 10 <R=0.227,r=0.214,N=   16>, Top: 3( 2 )[ 1 : 3 Fa= 23 : 0.214 ]( 3 , 4 , 22 ),<|>Tot Used: 49 , Added: 4 , Zero Std: 0 , Max Cor: 0.212
#> 
 11 <R=0.212,r=0.206,N=   16>, Top: 6( 2 )[ 1 : 6 Fa= 26 : 0.206 ]( 6 , 7 , 23 ),<|>Tot Used: 50 , Added: 7 , Zero Std: 0 , Max Cor: 0.203
#> 
 12 <R=0.203,r=0.202,N=   16>, Top: 1( 1 )[ 1 : 1 Fa= 26 : 0.202 ]( 1 , 1 , 26 ),<|>Tot Used: 50 , Added: 1 , Zero Std: 0 , Max Cor: 0.200
#> 
 13 <R=0.200,r=0.200,N=    2>, Top: 1( 1 )[ 1 : 1 Fa= 26 : 0.200 ]( 1 , 1 , 26 ),<|>Tot Used: 50 , Added: 1 , Zero Std: 0 , Max Cor: 0.200
#> 
 14 <R=0.200,r=0.200,N=    0>
#> 
 [ 14 ], 0.1997993 Decor Dimension: 50 Nused: 50 . Cor to Base: 22 , ABase: 2 , Outcome Base: 0 
#> 
varlistc <- colnames(DEdataframe)[colnames(DEdataframe) != outcome]

pander::pander(sum(apply(dataframe[,varlist],2,var)))

204

pander::pander(sum(apply(DEdataframe[,varlistc],2,var)))

185

pander::pander(entropy(discretize(unlist(dataframe[,varlist]), 256)))

1.73

pander::pander(entropy(discretize(unlist(DEdataframe[,varlistc]), 256)))

2.93

1.5.1 The decorrelation matrix


if (!largeSet)
{

  par(cex=0.6,cex.main=0.85,cex.axis=0.7)
  
  UPSTM <- attr(DEdataframe,"UPSTM")
  
  gplots::heatmap.2(1.0*(abs(UPSTM)>0),
                    trace = "none",
                    mar = c(5,5),
                    col=rev(heat.colors(5)),
                    main = "Decorrelation matrix",
                    cexRow = cexheat,
                    cexCol = cexheat,
                   srtCol=45,
                   srtRow=45,
                    key.title=NA,
                    key.xlab="|Beta|>0",
                    xlab="Output Feature", ylab="Input Feature")
  
  par(op)
}

1.6 The heatmap of the decorrelated data

if (!largeSet)
{

  hm <- heatMaps(data=DEdataframe[1:numsub,],
                 Outcome=outcome,
                 Scale=TRUE,
                 hCluster = "row",
                 cexRow = cexheat,
                 cexCol = cexheat,
                 srtCol=45,
                 srtRow=45,
                 xlab="Feature",
                 ylab="Sample")
  par(op)
}

1.7 The correlation matrix after decorrelation

if (!largeSet)
{

  cormat <- cor(DEdataframe[,varlistc],method="pearson")
  cormat[is.na(cormat)] <- 0
  
  gplots::heatmap.2(abs(cormat),
                    trace = "none",
                    mar = c(5,5),
                    col=rev(heat.colors(5)),
                    main = "Correlation after IDeA",
                    cexRow = cexheat,
                    cexCol = cexheat,
                     srtCol=45,
                     srtRow=45,
                    key.title=NA,
                    key.xlab="|Pearson Correlation|",
                    xlab="Feature", ylab="Feature")
  
  par(op)
  diag(cormat) <- 0
  print(max(abs(cormat)))
}

[1] 0.1997993

1.8 U-MAP Visualization of features

1.8.1 The UMAP based on LASSO on Raw Data


if (nrow(dataframe) < 1000)
{
  classes <- unique(dataframe[1:numsub,outcome])
  raincolors <- rainbow(length(classes))
  names(raincolors) <- classes
  datasetframe.umap = umap(scale(dataframe[1:numsub,varlist]),n_components=2)
  plot(datasetframe.umap$layout,xlab="U1",ylab="U2",main="UMAP: Original",t='n')
  text(datasetframe.umap$layout,labels=dataframe[1:numsub,outcome],col=raincolors[dataframe[1:numsub,outcome]+1])
}

1.8.2 The decorralted UMAP

if (nrow(dataframe) < 1000)
{

  datasetframe.umap = umap(scale(DEdataframe[1:numsub,varlistc]),n_components=2)
  plot(datasetframe.umap$layout,xlab="U1",ylab="U2",main="UMAP: After IDeA",t='n')
  text(datasetframe.umap$layout,labels=DEdataframe[1:numsub,outcome],col=raincolors[DEdataframe[1:numsub,outcome]+1])
}

1.9 Univariate Analysis

1.9.1 Univariate



univarRAW <- uniRankVar(varlist,
               paste(outcome,"~1"),
               outcome,
               dataframe,
               rankingTest="AUC")



univarDe <- uniRankVar(varlistc,
               paste(outcome,"~1"),
               outcome,
               DEdataframe,
               rankingTest="AUC",
               )

1.9.2 Final Table


univariate_columns <- c("caseMean","caseStd","controlMean","controlStd","controlKSP","ROCAUC")

##topfive
topvar <- c(1:length(varlist)) <= TopVariables
pander::pander(univarRAW$orderframe[topvar,univariate_columns])
  caseMean caseStd controlMean controlStd controlKSP ROCAUC
N1 2.93 1.33 3.47 1.252 0 0.614
A4 3.81 1.11 4.17 0.973 0 0.595
A5 2.39 1.19 2.02 1.085 0 0.592
A9 3.72 1.15 4.08 1.019 0 0.592
N2 3.46 1.18 3.09 1.150 0 0.589
N6 2.74 1.32 3.13 1.296 0 0.585
A6 3.68 1.18 4.03 1.078 0 0.585
N3 3.62 1.22 3.99 1.060 0 0.584
A7 2.36 1.16 2.03 1.091 0 0.582
A1 2.52 1.36 2.18 1.354 0 0.580


topLAvar <- univarDe$orderframe$Name[str_detect(univarDe$orderframe$Name,"La_")]
topLAvar <- unique(c(univarDe$orderframe$Name[topvar],topLAvar[1:as.integer(TopVariables/2)]))
finalTable <- univarDe$orderframe[topLAvar,univariate_columns]

theLaVar <- rownames(finalTable)[str_detect(rownames(finalTable),"La_")]

pander::pander(univarDe$orderframe[topLAvar,univariate_columns])
  caseMean caseStd controlMean controlStd controlKSP ROCAUC
La_N2 4.53 1.097 4.240 1.058 5.97e-14 0.582
La_N8 1.65 1.304 2.015 1.303 0.00e+00 0.579
La_E2 4.86 1.167 4.558 1.094 0.00e+00 0.578
La_A7 2.90 1.036 2.640 0.980 0.00e+00 0.578
La_A4 4.77 1.006 4.998 0.879 0.00e+00 0.572
La_E9 1.05 1.172 0.785 1.202 0.00e+00 0.564
La_C8 3.66 1.049 3.456 0.993 4.17e-05 0.556
La_C9 2.27 1.116 2.474 1.082 0.00e+00 0.555
La_N1 1.65 1.016 1.810 0.935 0.00e+00 0.553
O10 4.12 0.946 3.936 0.999 0.00e+00 0.552

dc <- getLatentCoefficients(DEdataframe)
fscores <- attr(DEdataframe,"fscore")

theSigDc <- dc[theLaVar]
names(theSigDc) <- NULL
theSigDc <- unique(names(unlist(theSigDc)))


theFormulas <- dc[rownames(finalTable)]
deFromula <- character(length(theFormulas))
names(deFromula) <- rownames(finalTable)

pander::pander(c(mean=mean(sapply(dc,length)),total=length(dc),fraction=length(dc)/(ncol(dataframe)-1)))
mean total fraction
3.3 46 0.902


allSigvars <- names(dc)



dx <- names(deFromula)[1]
for (dx in names(deFromula))
{
  coef <- theFormulas[[dx]]
  cname <- names(theFormulas[[dx]])
  names(cname) <- cname
  for (cf in names(coef))
  {
    if (cf != dx)
    {
      if (coef[cf]>0)
      {
        deFromula[dx] <- paste(deFromula[dx],
                               sprintf("+ %5.3f*%s",coef[cf],cname[cf]))
      }
      else
      {
        deFromula[dx] <- paste(deFromula[dx],
                               sprintf("%5.3f*%s",coef[cf],cname[cf]))
      }
    }
  }
}

finalTable <- rbind(finalTable,univarRAW$orderframe[theSigDc[!(theSigDc %in% rownames(finalTable))],univariate_columns])


orgnamez <- rownames(finalTable)
orgnamez <- str_remove_all(orgnamez,"La_")
finalTable$RAWAUC <- univarRAW$orderframe[orgnamez,"ROCAUC"]
finalTable$DecorFormula <- deFromula[rownames(finalTable)]
finalTable$fscores <- fscores[rownames(finalTable)]

Final_Columns <- c("DecorFormula","caseMean","caseStd","controlMean","controlStd","controlKSP","ROCAUC","RAWAUC","fscores")

finalTable <- finalTable[order(-finalTable$ROCAUC),]
pander::pander(finalTable[,Final_Columns])
  DecorFormula caseMean caseStd controlMean controlStd controlKSP ROCAUC RAWAUC fscores
N1 NA 2.93 1.328 3.471 1.252 0.00e+00 0.614 0.614 NA
A4 NA 3.81 1.111 4.172 0.973 0.00e+00 0.595 0.595 NA
N2 NA 3.46 1.184 3.094 1.150 0.00e+00 0.589 0.589 NA
N6 NA 2.74 1.322 3.134 1.296 0.00e+00 0.585 0.585 NA
N3 NA 3.62 1.221 3.986 1.060 0.00e+00 0.584 0.584 NA
La_N2 + 1.000N2 + 0.165N8 + 0.230*N10 4.53 1.097 4.240 1.058 5.97e-14 0.582 0.589 1
A7 NA 2.36 1.164 2.034 1.091 0.00e+00 0.582 0.582 NA
La_N8 + 1.000N8 -0.259A3 -0.075A4 -0.031A7 1.65 1.304 2.015 1.303 0.00e+00 0.579 0.565 11
La_E2 + 0.289E1 + 1.000E2 + 0.338*E3 4.86 1.167 4.558 1.094 0.00e+00 0.578 0.569 3
La_A7 + 0.044E1 + 0.343E3 -0.196A3 -0.057A4 + 0.977*A7 2.90 1.036 2.640 0.980 0.00e+00 0.578 0.582 4
La_A4 + 1.000A4 + 0.406A7 4.77 1.006 4.998 0.879 0.00e+00 0.572 0.595 8
E2 NA 2.96 1.328 2.635 1.288 0.00e+00 0.569 0.569 NA
N8 NA 2.61 1.338 2.925 1.345 0.00e+00 0.565 0.565 NA
La_E9 -0.368E1 -0.314E3 + 1.000E9 -0.060N8 1.05 1.172 0.785 1.202 0.00e+00 0.564 0.545 -1
A3 NA 2.33 1.265 2.059 1.170 0.00e+00 0.560 0.560 NA
La_C8 + 0.019age -0.189N8 + 0.329C1 + 1.000C8 3.66 1.049 3.456 0.993 4.17e-05 0.556 0.534 -3
La_C9 -0.411C1 + 0.184C6 + 1.000*C9 2.27 1.116 2.474 1.082 0.00e+00 0.555 0.549 0
La_N1 + 1.000N1 + 0.293N2 -0.349N3 -0.313N6 -0.066*N8 1.65 1.016 1.810 0.935 0.00e+00 0.553 0.614 -4
O10 4.12 0.946 3.936 0.999 0.00e+00 0.552 0.552 7
C9 NA 3.09 1.252 3.306 1.235 0.00e+00 0.549 0.549 NA
E9 NA 3.23 1.371 3.007 1.406 0.00e+00 0.545 0.545 NA
age NA 26.69 11.390 25.926 11.576 0.00e+00 0.535 0.535 1
C8 NA 2.57 1.153 2.428 1.116 0.00e+00 0.534 0.534 NA
N10 NA 2.76 1.303 2.883 1.317 0.00e+00 0.526 0.526 NA
E3 NA 3.38 1.233 3.439 1.236 0.00e+00 0.513 0.513 NA
C1 NA 3.30 1.106 3.330 1.093 0.00e+00 0.507 0.507 6
E1 NA 2.62 1.244 2.635 1.225 0.00e+00 0.504 0.504 9
C6 NA 2.92 1.382 2.926 1.408 0.00e+00 0.501 0.501 NA

1.10 Comparing IDeA vs PCA vs EFA

1.10.1 PCA

featuresnames <- colnames(dataframe)[colnames(dataframe) != outcome]
pc <- prcomp(dataframe[,iscontinous],center = TRUE,scale. = TRUE)   #principal components
predPCA <- predict(pc,dataframe[,iscontinous])
PCAdataframe <- as.data.frame(cbind(predPCA,dataframe[,!iscontinous]))
colnames(PCAdataframe) <- c(colnames(predPCA),colnames(dataframe)[!iscontinous]) 
#plot(PCAdataframe[,colnames(PCAdataframe)!=outcome],col=dataframe[,outcome],cex=0.65,cex.lab=0.5,cex.axis=0.75,cex.sub=0.5,cex.main=0.75)

#pander::pander(pc$rotation)


PCACor <- cor(PCAdataframe[,colnames(PCAdataframe) != outcome])


  gplots::heatmap.2(abs(PCACor),
                    trace = "none",
  #                  scale = "row",
                    mar = c(5,5),
                    col=rev(heat.colors(5)),
                    main = "PCA Correlation",
                    cexRow = 0.5,
                    cexCol = 0.5,
                     srtCol=45,
                     srtRow= -45,
                    key.title=NA,
                    key.xlab="Pearson Correlation",
                    xlab="Feature", ylab="Feature")

1.10.2 EFA


EFAdataframe <- dataframeScaled

if (length(iscontinous) < 2000)
{
  topred <- min(length(iscontinous),nrow(dataframeScaled),ncol(predPCA)/2)
  if (topred < 2) topred <- 2
  
  uls <- fa(dataframeScaled[,iscontinous],nfactors=topred,rotate="varimax",warnings=FALSE)  # EFA analysis
  predEFA <- predict(uls,dataframeScaled[,iscontinous])
  EFAdataframe <- as.data.frame(cbind(predEFA,dataframeScaled[,!iscontinous]))
  colnames(EFAdataframe) <- c(colnames(predEFA),colnames(dataframeScaled)[!iscontinous]) 


  
  EFACor <- cor(EFAdataframe[,colnames(EFAdataframe) != outcome])
  
  
    gplots::heatmap.2(abs(EFACor),
                      trace = "none",
    #                  scale = "row",
                      mar = c(5,5),
                      col=rev(heat.colors(5)),
                      main = "EFA Correlation",
                      cexRow = 0.5,
                      cexCol = 0.5,
                       srtCol=45,
                       srtRow= -45,
                      key.title=NA,
                      key.xlab="Pearson Correlation",
                      xlab="Feature", ylab="Feature")
}

1.11 Effect on CAR modeling

par(op)
par(xpd = TRUE)
dataframe[,outcome] <- factor(dataframe[,outcome])
rawmodel <- rpart(paste(outcome,"~."),dataframe,control=rpart.control(maxdepth=3))
pr <- predict(rawmodel,dataframe,type = "class")

  ptab <- list(er="Error",detail=matrix(nrow=6,ncol=1))
  if (length(unique(pr))>1)
  {
    plot(rawmodel,main="Raw",branch=0.5,uniform = TRUE,compress = TRUE,margin=0.1)
    text(rawmodel, use.n = TRUE,cex=0.75)
    ptab <- epiR::epi.tests(table(pr==0,dataframe[,outcome]==0))
  }


pander::pander(table(dataframe[,outcome],pr))
  0 1
0 10277 1554
1 5288 2184
pander::pander(ptab)
  • detail:

    statistic est lower upper
    ap 0.194 0.188 0.199
    tp 0.387 0.380 0.394
    se 0.292 0.282 0.303
    sp 0.869 0.862 0.875
    diag.ac 0.646 0.639 0.652
    diag.or 2.731 2.539 2.938
    nndx 6.213 5.636 6.924
    youden 0.161 0.144 0.177
    pv.pos 0.584 0.568 0.600
    pv.neg 0.660 0.653 0.668
    lr.pos 2.225 2.099 2.359
    lr.neg 0.815 0.802 0.828
    p.rout 0.806 0.801 0.812
    p.rin 0.194 0.188 0.199
    p.tpdn 0.131 0.125 0.138
    p.tndp 0.708 0.697 0.718
    p.dntp 0.416 0.400 0.432
    p.dptn 0.340 0.332 0.347
  • tab:

      Outcome + Outcome - Total
    Test + 2184 1554 3738
    Test - 5288 10277 15565
    Total 7472 11831 19303
  • method: exact

  • digits: 2

  • conf.level: 0.95

pander::pander(ptab$detail[c(5,3,4,6),])
  statistic est lower upper
5 diag.ac 0.646 0.639 0.652
3 se 0.292 0.282 0.303
4 sp 0.869 0.862 0.875
6 diag.or 2.731 2.539 2.938

par(op)
par(xpd = TRUE)
DEdataframe[,outcome] <- factor(DEdataframe[,outcome])
IDeAmodel <- rpart(paste(outcome,"~."),DEdataframe,control=rpart.control(maxdepth=3))
pr <- predict(IDeAmodel,DEdataframe,type = "class")

  ptab <- list(er="Error",detail=matrix(nrow=6,ncol=1))
  if (length(unique(pr))>1)
  {
    plot(IDeAmodel,main="IDeA",branch=0.5,uniform = TRUE,compress = TRUE,margin=0.1)
    text(IDeAmodel, use.n = TRUE,cex=0.75)
    ptab <- epiR::epi.tests(table(pr==0,DEdataframe[,outcome]==0))
  }

pander::pander(table(DEdataframe[,outcome],pr))
  0 1
0 10923 908
1 5970 1502
pander::pander(ptab)
  • detail:

    statistic est lower upper
    ap 0.1249 0.120 0.1296
    tp 0.3871 0.380 0.3940
    se 0.2010 0.192 0.2103
    sp 0.9233 0.918 0.9280
    diag.ac 0.6437 0.637 0.6504
    diag.or 3.0266 2.771 3.3057
    nndx 8.0470 7.232 9.0670
    youden 0.1243 0.110 0.1383
    pv.pos 0.6232 0.604 0.6426
    pv.neg 0.6466 0.639 0.6538
    lr.pos 2.6192 2.425 2.8292
    lr.neg 0.8654 0.855 0.8763
    p.rout 0.8751 0.870 0.8798
    p.rin 0.1249 0.120 0.1296
    p.tpdn 0.0767 0.072 0.0817
    p.tndp 0.7990 0.790 0.8080
    p.dntp 0.3768 0.357 0.3965
    p.dptn 0.3534 0.346 0.3607
  • tab:

      Outcome + Outcome - Total
    Test + 1502 908 2410
    Test - 5970 10923 16893
    Total 7472 11831 19303
  • method: exact

  • digits: 2

  • conf.level: 0.95

pander::pander(ptab$detail[c(5,3,4,6),])
  statistic est lower upper
5 diag.ac 0.644 0.637 0.650
3 se 0.201 0.192 0.210
4 sp 0.923 0.918 0.928
6 diag.or 3.027 2.771 3.306

par(op)
par(xpd = TRUE)
PCAdataframe[,outcome] <- factor(PCAdataframe[,outcome])
PCAmodel <- rpart(paste(outcome,"~."),PCAdataframe,control=rpart.control(maxdepth=3))
pr <- predict(PCAmodel,PCAdataframe,type = "class")
ptab <- list(er="Error",detail=matrix(nrow=6,ncol=1))
if (length(unique(pr))>1)
{
  plot(PCAmodel,main="PCA",branch=0.5,uniform = TRUE,compress = TRUE,margin=0.1)
  text(PCAmodel, use.n = TRUE,cex=0.75)
  ptab <- epiR::epi.tests(table(pr==0,PCAdataframe[,outcome]==0))
}

pander::pander(table(PCAdataframe[,outcome],pr))
  0 1
0 10210 1621
1 5147 2325
pander::pander(ptab)
  • detail:

    statistic est lower upper
    ap 0.204 0.199 0.210
    tp 0.387 0.380 0.394
    se 0.311 0.301 0.322
    sp 0.863 0.857 0.869
    diag.ac 0.649 0.643 0.656
    diag.or 2.845 2.648 3.057
    nndx 5.742 5.237 6.356
    youden 0.174 0.157 0.191
    pv.pos 0.589 0.574 0.605
    pv.neg 0.665 0.657 0.672
    lr.pos 2.271 2.146 2.403
    lr.neg 0.798 0.785 0.812
    p.rout 0.796 0.790 0.801
    p.rin 0.204 0.199 0.210
    p.tpdn 0.137 0.131 0.143
    p.tndp 0.689 0.678 0.699
    p.dntp 0.411 0.395 0.426
    p.dptn 0.335 0.328 0.343
  • tab:

      Outcome + Outcome - Total
    Test + 2325 1621 3946
    Test - 5147 10210 15357
    Total 7472 11831 19303
  • method: exact

  • digits: 2

  • conf.level: 0.95

pander::pander(ptab$detail[c(5,3,4,6),])
  statistic est lower upper
5 diag.ac 0.649 0.643 0.656
3 se 0.311 0.301 0.322
4 sp 0.863 0.857 0.869
6 diag.or 2.845 2.648 3.057


par(op)

1.11.1 EFA


  EFAdataframe[,outcome] <- factor(EFAdataframe[,outcome])
  EFAmodel <- rpart(paste(outcome,"~."),EFAdataframe,control=rpart.control(maxdepth=3))
  pr <- predict(EFAmodel,EFAdataframe,type = "class")
  
  ptab <- list(er="Error",detail=matrix(nrow=6,ncol=1))
  if (length(unique(pr))>1)
  {
    plot(EFAmodel,main="EFA",branch=0.5,uniform = TRUE,compress = TRUE,margin=0.1)
    text(EFAmodel, use.n = TRUE,cex=0.75)
    ptab <- epiR::epi.tests(table(pr==0,EFAdataframe[,outcome]==0))
  }


  pander::pander(table(EFAdataframe[,outcome],pr))
  0 1
0 10198 1633
1 5151 2321
  pander::pander(ptab)
  • detail:

    statistic est lower upper
    ap 0.205 0.199 0.211
    tp 0.387 0.380 0.394
    se 0.311 0.300 0.321
    sp 0.862 0.856 0.868
    diag.ac 0.649 0.642 0.655
    diag.or 2.814 2.619 3.023
    nndx 5.794 5.280 6.420
    youden 0.173 0.156 0.189
    pv.pos 0.587 0.571 0.602
    pv.neg 0.664 0.657 0.672
    lr.pos 2.250 2.127 2.381
    lr.neg 0.800 0.786 0.813
    p.rout 0.795 0.789 0.801
    p.rin 0.205 0.199 0.211
    p.tpdn 0.138 0.132 0.144
    p.tndp 0.689 0.679 0.700
    p.dntp 0.413 0.398 0.429
    p.dptn 0.336 0.328 0.343
  • tab:

      Outcome + Outcome - Total
    Test + 2321 1633 3954
    Test - 5151 10198 15349
    Total 7472 11831 19303
  • method: exact

  • digits: 2

  • conf.level: 0.95

  pander::pander(ptab$detail[c(5,3,4,6),])
  statistic est lower upper
5 diag.ac 0.649 0.642 0.655
3 se 0.311 0.300 0.321
4 sp 0.862 0.856 0.868
6 diag.or 2.814 2.619 3.023
  par(op)

1.12 The Latent formulas

theLaFormulas <- getLatentCoefficients(DEdataframe)
                       

pander::pander(theLaFormulas)
  • La_E2:

    E1 E2 E3
    0.289 1 0.338
  • La_E3:

    E1 E3 E7 N8 N10
    -0.555 0.857 0.212 0.0823 0.206
  • La_E4:

    E1 E2 E3 E4
    0.239 -0.305 0.218 1
  • La_E5:

    E1 E2 E3 E5 E7
    0.0619 0.214 -0.278 1 -0.389
  • La_E6:

    E1 E2 E3 E6 A3 A4 A7 O10
    0.168 -0.398 -0.0625 1 -0.0411 -0.012 -0.163 0.208
  • La_E7:

    E1 E2 E3 E7
    -0.305 0.201 -0.605 1
  • La_E8:

    E8 E9
    1 0.468
  • La_E9:

    E1 E3 E9 N8
    -0.368 -0.314 1 -0.0598
  • La_E10:

    E1 E2 E3 E4 E7 E10
    0.0377 -0.137 0.063 -0.228 0.293 1
  • La_N1:

    N1 N2 N3 N6 N8
    1 0.293 -0.349 -0.313 -0.0664
  • La_N2:

    N2 N8 N10
    1 0.165 0.23
  • La_N3:

    N2 N3 N6 N8
    0.224 1 -0.313 -0.0695
  • La_N4:

    N4 N8 N10
    1 0.0472 0.341
  • La_N5:

    N5 N6 N8
    1 -0.366 -0.176
  • La_N6:

    N2 N6 N8
    0.261 1 -0.453
  • La_N7:

    N7 N8
    1 -0.739
  • La_N8:

    N8 A3 A4 A7
    1 -0.259 -0.0754 -0.0306
  • La_N9:

    N6 N8 N9 A3
    -0.442 -0.276 1 -0.192
  • La_N10:

    N8 N10
    -0.526 1
  • La_A1:

    A1 A4 A7
    1 0.291 -0.298
  • La_A2:

    A2 A4 A7
    1 -0.188 0.471
  • La_A3:

    A3 A4 A7
    1 0.291 0.118
  • La_A4:

    A4 A7
    1 0.406
  • La_A5:

    A4 A5 A7
    0.347 1 -0.428
  • La_A6:

    A4 A6
    -0.553 1
  • La_A7:

    E1 E3 A3 A4 A7
    0.044 0.343 -0.196 -0.0572 0.977
  • La_A8:

    A4 A7 A8
    -0.351 0.212 1
  • La_A9:

    A4 A6 A9
    -0.567 -0.196 1
  • La_A10:

    E1 E3 A4 A7 A10
    -0.138 -0.273 -0.224 -0.0909 1
  • La_C2:

    C2 C6
    1 -0.498
  • La_C3:

    C3 C10
    1 -0.334
  • La_C4:

    N8 C2 C4 C6
    -0.241 -0.193 1 -0.312
  • La_C5:

    C1 C5 C6 C9
    -0.193 1 0.346 -0.227
  • La_C6:

    C1 C6
    0.399 1
  • La_C7:

    C1 C7 C9
    -0.18 1 -0.299
  • La_C8:

    age N8 C1 C8
    0.0187 -0.189 0.329 1
  • La_C9:

    C1 C6 C9
    -0.411 0.184 1
  • La_C10:

    C1 C10
    -0.304 1
  • La_O1:

    O1 O10
    1 -0.341
  • La_O2:

    O1 O2 O10
    0.333 1 0.248
  • La_O3:

    O3 O10
    1 -0.442
  • La_O4:

    O2 O4
    -0.523 1
  • La_O5:

    O5 O10
    1 -0.595
  • La_O6:

    O3 O6 O10
    0.558 1 0.223
  • La_O7:

    O2 O5 O7 O10
    0.298 -0.239 1 -0.0576
  • La_O8:

    O1 O8
    -0.715 1